#delim cr
set more off
set varabbrev off
pause on
graph set ps logo off

capture log close
set linesize 200
set logtype text
log using ../log/compile-all-environmental-data.log , replace

/* --------------------------------------

We have pollution data from several
sources: Junjie, Shuang, official chinese sources.
We compare them here.

--------------------------------------- */

clear all
estimates clear
set mem 50m

************************************************************
**   Bring in Shanghai API data from EMC
************************************************************

use ../dta/shanghaiAPI.dta , clear
d, f
summarize

format Date %td
codebook Date

d, f
rename PM10api emc_api_pm10
rename SO2api emc_api_so2
rename NO2api emc_api_no2

tempfile emc_data
save `emc_data'

************************************************************
**   Bring in Shanghai data from Shuang
************************************************************

use ../src/data-from-shuang/API_shanghai.dta , clear
d, f
summarize

tab1 cityname pollutant city , miss
drop cityname city

gen random = uniform() 
sort random
drop random
list pollutant api in 1/100

gen Date = mdy(month, date, year)
format Date %td
codebook Date
drop month date year

bysort pollutant Date: gen N = _n
tab N
list if N > 1
drop if N == 2
drop N

rename api api_

d, f
replace pollutant = "xx" if pollutant == "--"
isid Date pollutant
reshape wide api_ , i(Date) j(pollutant) string

d, f
foreach var in api_no2 api_pm10 api_so2 api_xx {
	rename `var' shanghai_shuang_`var'
}

tempfile shuang_shanghai
save `shuang_shanghai'

************************************************************
**   Bring in Nantong data from Shuang
************************************************************

use ../src/data-from-shuang/API_nantong.dta , clear
d, f
summarize

tab1 prov city_name pollutant _merge*, miss
drop prov city_name _merge*

gen Date = yd
format Date %td
codebook Date
drop year month date yd day

d, f

bysort pollutant Date: gen N = _n
tab N
list if N > 1
drop if N == 2
drop N

rename api api_

d, f
replace pollutant = "xx" if pollutant == "--"
isid Date pollutant
reshape wide api_ , i(Date) j(pollutant) string

rename api_PM10 api_pm10 
rename api_SO2 api_so2 
rename api_xx api_xx 

d, f
foreach var in api_pm10 api_so2 api_xx {
	rename `var' nantong_shuang_`var'
}

codebook Date if nantong_shuang_api_xx != .
codebook Date if nantong_shuang_api_pm10 != .
codebook Date if nantong_shuang_api_so2 != .

tempfile shuang_nantong
save `shuang_nantong'

************************************************************
**   Bring in data from Junjie
************************************************************

insheet using ../src/data-from-junjie/shanghai_api.csv , comma names clear
d, f 
summarize

tab cityname , miss
drop cityname

codebook date
gen Date = date(date, "MDY")
format Date %td
codebook Date
drop date

d, f

rename api junjie_api_xx

tempfile junjie
save `junjie'

************************************************************
**   Bring in Twitter PM2.5 Data
************************************************************

insheet using ../src/data-from-junjie/shanghai-consulate-twitter-data.csv , comma names clear
d, f 
summarize

list in 1/10

tab province , miss
drop max_scale avg_scale province

gen Date = date(date,"DM20Y")
format Date %td
codebook Date

rename avg_aqi twitter_aqi
rename max_aqi twitter_aqi_max

d, f
drop date

tempfile twitter
save `twitter'

************************************************************
**   Bring in our NOAA weather data
************************************************************

use ../src/noaa-data-in-china/cleaned-shanghai-beijing-noaa.dta

d, f
summarize

tab station_name , miss

keep if station_name == "SHANGHAI CH"

rename date Date
format Date %td
codebook Date

keep tmax* tmin* precipitation Date

d, f

tempfile noaa
save `noaa'

************************************************************
**   Bring in Nantong temperature
************************************************************

use ../src/nantong_shanghai_temperatures.dta , clear

d, f
summarize
codebook date

rename date Date

keep Date nantong_temp

tempfile nantong_temp
save `nantong_temp'

************************************************************
**   Bring in weather data from Shuang
************************************************************

use ../src/data-from-shuang/Weather_shanghai.dta , clear

d, f
summarize

gen Date = mdy(month, date, year)
format Date %td
codebook Date

tab1 city cityname gust sndp , miss

drop year month date gust sndp city cityname

d, f

tempfile shuang_weather
save `shuang_weather'

************************************************************
**  Merge all of these pieces together
************************************************************

** First create a skeleton with every date
** in the CTRIP data
clear
set obs 1825
gen Date = _n + mdy(1, 1, 2010) - 1
format Date %td
drop if Date > mdy(4, 30, 2013)

merge 1:1 Date using `emc_data'
tab Date if _merge == 2
drop if _merge == 2
drop _merge

merge 1:1 Date using `shuang_shanghai'
tab Date if _merge == 2
drop if _merge == 2
drop _merge

merge 1:1 Date using `shuang_nantong'
tab Date if _merge == 2
drop if _merge == 2
drop _merge

merge 1:1 Date using `junjie'
tab Date if _merge == 2
drop if _merge == 2
drop _merge

merge 1:1 Date using `twitter'
tab Date if _merge == 2
drop if _merge == 2
drop _merge

merge 1:1 Date using `noaa'
tab Date if _merge == 2
drop if _merge == 2
drop _merge

merge 1:1 Date using `shuang_weather'
tab Date if _merge == 2
drop if _merge == 2
drop _merge

merge 1:1 Date using `nantong_temp'
tab Date if _merge == 2
drop if _merge == 2
drop _merge

************************************************************
**   Compare!
************************************************************

sort Date
d, f

list Date emc_api_pm10 emc_api_so2 emc_api_no2 shanghai_shuang_api_no2 shanghai_shuang_api_pm10 shanghai_shuang_api_so2 shanghai_shuang_api_xx nantong_shuang_api_pm10 nantong_shuang_api_so2 nantong_shuang_api_xx junjie_api_xx twitter_aqi twitter_aqi_max , clean

list Date precipitation tmax_cel tmin_cel tmax_fah tmin_fah visib temp dewp slp stp wdsp mxspd prcp frshtt  , clean

gen key_difference = junjie_api_xx - emc_api_pm10
tab key_difference , miss
drop key_difference

************************************************************
**   Save out our data
************************************************************

compress
save ../dta/all-our-environmental-data.dta , replace

log close
exit

